source("functions.R")
library(GenomicAlignments)
library(Rsamtools)
library(tidyverse)
library(Rsubread)
library(magrittr)
# design table
design_table <- read_table_in("RNASEQ_QUANTIFICATION/design_table.tsv")
DT::datatable(design_table, options = list(scrollX = TRUE))
# gene level quantification (TPMs)
quant_gene_level <- read_table_in("RNASEQ_QUANTIFICATION/merged_quantification_genes.tsv")
DT::datatable(quant_gene_level[1:100,], options = list(scrollX = TRUE))
# transcript level quantification (TPMs)
quant_transcript_level <- read_table_in("RNASEQ_QUANTIFICATION/merged_quantification_transcripts.tsv")
DT::datatable(quant_transcript_level[1:100,], options = list(scrollX = TRUE))
# design table
DT::datatable(read.csv("ATACSEQ/design.csv"),options = list(scrollX = TRUE))
Output from nf-core ATACseq pipeline (https://nf-co.re/atacseq)
# bam/bai
ATACSEQ/all_regions/bam
# bigwig
ATACSEQ/all_regions/bigwig
# macs2 narrow peaks
ATACSEQ/all_regions/narrowPeak
# qc summary
ATACSEQ/all_regions/multiqc_data
I filtered bams from ATACSEQ/all_regions/bam with
samtools and did peak calling on them:
# filtereing bams with samtools and awk
samtools view -h ${f} | \
awk 'substr($0,1,1)=="@" || ($9<= 100 && $9>0) || ($9>=-100 && $9<0)' | \
samtools view -@ 8 -b > ./bam/$sample.ncfree.bam
samtools sort -@ 8 ./bam/$sample.ncfree.bam -o ./bam/$sample.ncfree.sorted.bam
samtools flagstat -@ 8 ./bam/$sample.ncfree.sorted.bam > ./bam/$sample.ncfree.sorted.bam.stats
samtools index ./bam/$sample.ncfree.sorted.bam
# macs2 peak calling
macs2 callpeak -t ${f} -f BAMPE --outdir macs2_peaks --name ${sample} -g 269400000
# bam/bai
ATACSEQ/nucleosome_free_regions/bam
# macs2 narrow peaks
ATACSEQ/nucleosome_free_regions/macs2_peaks
library(GenomicAlignments)
library(tidyverse)
atacReads <- readGAlignmentPairs(
"ATACSEQ/all_regions/bam/Elav_neg_R1.mLb.clN.sorted.bam",
param = ScanBamParam(mapqFilter = 1,
flag = scanBamFlag(isPaired = TRUE,
isProperPair = TRUE),
what = c("qname", "mapq", "isize")))
atacReads_read1 <- GenomicAlignments::first(atacReads)
insertSizes <- abs(elementMetadata(atacReads_read1)$isize)
head(insertSizes)
## [1] 280 441 75 65 207 341
fragLenPlot <- table(insertSizes) %>% data.frame %>% rename(InsertSize = insertSizes,
Count = Freq) %>% mutate(InsertSize = as.numeric(as.vector(InsertSize)),
Count = as.numeric(as.vector(Count))) %>% ggplot(aes(x = InsertSize, y = Count)) +
geom_line()
fragLenPlot + geom_vline(xintercept = c(180, 247), colour = "red") +
geom_vline(xintercept = c(315, 437), colour = "darkblue") +
geom_vline(xintercept = c(100), colour = "darkgreen") +
theme_bw()
library(GenomicAlignments)
library(tidyverse)
atacReads <- readGAlignmentPairs(
"ATACSEQ/nucleosome_free_regions/bam/Elav_neg_R1.mLb.clN.ncfree.bam",
param = ScanBamParam(mapqFilter = 1,
flag = scanBamFlag(isPaired = TRUE,
isProperPair = TRUE),
what = c("qname", "mapq", "isize")))
atacReads_read1 <- GenomicAlignments::first(atacReads)
insertSizes <- abs(elementMetadata(atacReads_read1)$isize)
head(insertSizes)
## [1] 75 65 48 61 57 43
fragLenPlot <- table(insertSizes) %>% data.frame %>% rename(InsertSize = insertSizes,
Count = Freq) %>% mutate(InsertSize = as.numeric(as.vector(InsertSize)),
Count = as.numeric(as.vector(Count))) %>% ggplot(aes(x = InsertSize, y = Count)) +
geom_line()
fragLenPlot + geom_vline(xintercept = c(180, 247), colour = "red") +
geom_vline(xintercept = c(315, 437), colour = "darkblue") +
geom_vline(xintercept = c(100), colour = "darkgreen") +
theme_bw()
path = "ATACSEQ/nucleosome_free_regions/bam/"
for(i in list.files(path, pattern = ".sorted.bam$")){
print(idxstatsBam(paste0(path,i)) %>%
ggplot(aes(seqnames, mapped, fill = seqnames)) +
geom_bar(stat = "identity") + coord_flip() + ggtitle(i))}
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] forcats_0.5.2 stringr_1.4.1
## [3] dplyr_1.0.10 purrr_0.3.4
## [5] readr_2.1.2 tidyr_1.2.1
## [7] tibble_3.1.8 ggplot2_3.3.6
## [9] tidyverse_1.3.2 GenomicAlignments_1.32.1
## [11] Rsamtools_2.12.0 Biostrings_2.64.1
## [13] XVector_0.36.0 SummarizedExperiment_1.26.1
## [15] Biobase_2.56.0 MatrixGenerics_1.8.1
## [17] matrixStats_0.62.0 GenomicRanges_1.48.0
## [19] GenomeInfoDb_1.32.4 IRanges_2.30.1
## [21] S4Vectors_0.34.0 BiocGenerics_0.42.0
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 fs_1.5.2 lubridate_1.8.0
## [4] httr_1.4.4 tools_4.2.2 backports_1.4.1
## [7] bslib_0.4.0 utf8_1.2.2 R6_2.5.1
## [10] DT_0.25 DBI_1.1.3 colorspace_2.0-3
## [13] withr_2.5.0 tidyselect_1.1.2 compiler_4.2.2
## [16] rvest_1.0.3 cli_3.4.1 xml2_1.3.3
## [19] DelayedArray_0.22.0 labeling_0.4.2 sass_0.4.2
## [22] scales_1.2.1 digest_0.6.29 rmarkdown_2.16
## [25] pkgconfig_2.0.3 htmltools_0.5.3 highr_0.9
## [28] dbplyr_2.2.1 fastmap_1.1.0 htmlwidgets_1.5.4
## [31] rlang_1.0.6 readxl_1.4.1 rstudioapi_0.14
## [34] farver_2.1.1 jquerylib_0.1.4 generics_0.1.3
## [37] jsonlite_1.8.0 crosstalk_1.2.0 BiocParallel_1.30.3
## [40] googlesheets4_1.0.1 RCurl_1.98-1.8 magrittr_2.0.3
## [43] GenomeInfoDbData_1.2.8 Matrix_1.5-1 munsell_0.5.0
## [46] fansi_1.0.3 lifecycle_1.0.2 stringi_1.7.8
## [49] yaml_2.3.5 zlibbioc_1.42.0 grid_4.2.2
## [52] parallel_4.2.2 crayon_1.5.1 lattice_0.20-45
## [55] haven_2.5.1 hms_1.1.2 knitr_1.40
## [58] pillar_1.8.1 codetools_0.2-18 reprex_2.0.2
## [61] glue_1.6.2 evaluate_0.16 modelr_0.1.9
## [64] vctrs_0.4.1 tzdb_0.3.0 cellranger_1.1.0
## [67] gtable_0.3.1 assertthat_0.2.1 cachem_1.0.6
## [70] xfun_0.33 broom_1.0.1 googledrive_2.0.0
## [73] gargle_1.2.1 ellipsis_0.3.2